- Notifications
You must be signed in to change notification settings - Fork 849
/
Copy pathOML4SQL Data Cleaning Duplicates Removal.dsnb
executable file
·1 lines (1 loc) · 5.4 KB
/
OML4SQL Data Cleaning Duplicates Removal.dsnb
1
[{"layout":null,"template":null,"templateConfig":null,"name":"OML4SQL Data Cleaning Duplicates Removal","description":null,"readOnly":false,"type":"low","paragraphs":[{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":null,"title":null,"message":["%md"," "],"enabled":true,"result":null,"sizeX":0,"hideCode":true,"width":12,"hideResult":true,"dynamicFormParams":null,"row":0,"hasTitle":false,"hideVizConfig":true,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"html","title":null,"message":["%md","","# OML4SQL Data Cleaning: Duplicates Removal","In this notebook, we demonstrate how to remove duplicate records using Oracle SQL.","","We use the customer insurance lifetime value data set which contains customer financial information, lifetime value, and whether or not the customer bought insurance.","","The dataset `CUSTOMER_INSURANCE_LTV_SQL` is generated by the `\"OML Run-me-first\"` notebook, which `MUST` be run before this notebook.","","---","","###### IMPORTANT: The `\"OML Run-me-first\"` notebook is available under the menu `Templates -> Examples` and is a prerequisite to the current notebook.","","---","Copyright (c) 2024 Oracle Corporation ","###### <a href=\"https://oss.oracle.com/licenses/upl/\" onclick=\"return ! window.open('https://oss.oracle.com/licenses/upl/');\">The Universal Permissive License (UPL), Version 1.0<\/a>","---"],"enabled":true,"result":null,"sizeX":0,"hideCode":true,"width":12,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":false,"hideVizConfig":true,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"html","title":"For more information...","message":["%md"," ","* <a href=\"https://docs.oracle.com/en/cloud/paas/autonomous-data-warehouse-cloud/index.html\" target=\"_blank\">Oracle ADB Documentation<\/a>","* <a href=\"https://github.com/oracle-samples/oracle-db-examples/tree/main/machine-learning\" target=\"_blank\">OML folder on Oracle GitHub<\/a>","* <a href=\"https://www.oracle.com/machine-learning\" target=\"_blank\">OML Web Page<\/a>"],"enabled":true,"result":null,"sizeX":0,"hideCode":true,"width":12,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":true,"hideVizConfig":true,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"raw","title":"Check the number of distinct customer id","message":["%sql","","SELECT COUNT(DISTINCT CUSTOMER_ID)","FROM CUSTOMER_INSURANCE_LTV_SQL;"],"enabled":true,"result":null,"sizeX":0,"hideCode":false,"width":12,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":true,"hideVizConfig":false,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"raw","title":"Check the number of total rows in the table. There are duplicated rows","message":["%sql","","SELECT COUNT(1) FROM CUSTOMER_INSURANCE_LTV_SQL;"],"enabled":true,"result":null,"sizeX":0,"hideCode":false,"width":12,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":true,"hideVizConfig":false,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"html","title":"Create a view with all distinct rows","message":["%sql","","CREATE OR REPLACE VIEW CUSTOMER_INSURANCE_LTV_SQL_UNIQUE AS ","SELECT *","FROM CUSTOMER_INSURANCE_LTV_SQL","WHERE rowid IN ("," SELECT max(rowid) row_id"," FROM (SELECT customer_id, rowid"," FROM CUSTOMER_INSURANCE_LTV_SQL)"," GROUP BY customer_id"," ) "," "],"enabled":true,"result":null,"sizeX":0,"hideCode":false,"width":12,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":true,"hideVizConfig":false,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"raw","title":"Check the number of rows in the table. All rows are distinct.","message":["%sql","","SELECT COUNT(1) FROM CUSTOMER_INSURANCE_LTV_SQL_UNIQUE;"],"enabled":true,"result":null,"sizeX":0,"hideCode":false,"width":12,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":true,"hideVizConfig":false,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"raw","title":"Check the number of distinct CUSTOMER_ID in the table","message":["%sql","","SELECT COUNT(DISTINCT CUSTOMER_ID) FROM CUSTOMER_INSURANCE_LTV_SQL_UNIQUE;"],"enabled":true,"result":null,"sizeX":0,"hideCode":false,"width":12,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":true,"hideVizConfig":false,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"html","title":null,"message":["%md","","## End of Script"],"enabled":true,"result":null,"sizeX":0,"hideCode":true,"width":12,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":false,"hideVizConfig":true,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"html","title":null,"message":["%md"],"enabled":true,"result":null,"sizeX":0,"hideCode":true,"width":12,"hideResult":true,"dynamicFormParams":null,"row":0,"hasTitle":false,"hideVizConfig":true,"hideGutter":true,"relations":[],"forms":"[]"}],"version":"6","snapshot":false,"tags":null}]